do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Build a small dataset for teacher experience***********************************

	use "$basedata/allLEA_teacher_year_data.dta", clear
	keep ncerdc_id sy tchr_exp_pay_level educ_lvl gross_pay
	replace gross_pay = gross_pay/1000
	replace sy = sy+1
	ren tchr_exp_pay_level experience
	tempfile tchr_exp
	save `tchr_exp', replace
	
	
********************************************************************************
*BUILD THE DATA*****************************************************************
	
	use "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf.dta", clear 

	gen sortvar = _n
	
	sort itidx
	merge m:1 itidx using "$temp/VAimputed", keepusing(VA1_imputed_0samp VA2_imputed_0samp)
	assert _m==3
	drop _m	


	
	sort sortvar
	drop sortvar
	
	merge m:1 ncerdc_schlcode jidx jtidx using "$basedata/estimationdata/All_T_J/All_T_J_schoolvar_buff_0_0_lf.dta", gen (school_merge)
	merge m:1 applicant_id app_year using "$basedata/estimationdata/All_T_J/All_T_J_teachvar_buff_0_0_lf.dta", gen(app_merge)
		gen sy=app_year+1
	merge m:1 ncerdc_id sy using `tchr_exp', gen(teach_merge) 
	drop experience

	keep if teach_merge==1 | teach_merge==3
	
	gen sortvar = _n
	
	gen hired_date = applied_date if hired==1
	bys jtidx: egen max_hired_date = max(hired_date)
	gen first_half = (applied_date<=PostingDate+(CloseDate-PostingDate)/2)
	gen second_half =  (applied_date>PostingDate+(CloseDate-PostingDate)/2 & applied_date!=.)
	
	gen within_14_days_hire = abs(applied_date-max_hired_date)<=14
	
	sort sortvar
	drop sortvar
	

gen mu_jt_m1_hat_preY_ma_imp = mu_jt_m1_hat_preY_ma
gen mu_jt_m2_hat_preY_ma_imp = mu_jt_m2_hat_preY_ma
replace mu_jt_m1_hat_preY_ma_imp = VA1_imputed_0samp if has_va_ma_preY==0
replace mu_jt_m2_hat_preY_ma_imp = VA2_imputed_0samp if has_va_ma_preY==0

gen imputedteacher = has_va_ma_preY==0

	
*Make fixed effects
egen application_id=group(job_id app_year)

*Make variables
gen commute=commute_time
gen commute_miss=commute_time==.
replace commute=0 if commute_time==.
gen exp_2=tchr_exp==2 | tchr_exp==3
	replace exp_2=0 if tchr_exp==.
gen exp_4=tchr_exp>=4 & tchr_exp<=6
	replace exp_4=0 if tchr_exp==.
gen exp_6=tchr_exp>6
	replace exp_6=0 if tchr_exp==.
gen exp_missing=tchr_exp==.
gen masters=educ_lvl==5|educ_lvl==7

gen race_missing=black==.
replace black=0 if race_missing==1
replace hispanic=0 if race_missing==1
gen sex_missing=female==.
replace female=0 if sex_missing==1

gen evaas_missing = evaas_score==.
replace evaas_score = 0 if evaas_missing==1

gen mean_n_ma = mean_n1_ma+mean_n2_ma
foreach suff3 in "_cfr" "_homog" "_sM" "_imp" {
	gen mean_n1_ma`suff3' = mean_n1_ma
	gen mean_n2_ma`suff3' = mean_n2_ma
	gen mean_n_ma`suff3' = mean_n_ma
}


foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" "_imp" {
	

bys app_year: egen mean_n1_ma_district`suff2' = mean(mean_n1_ma`suff2')
bys app_year: egen mean_n2_ma_district`suff2' = mean(mean_n2_ma`suff2')
gen mean_n_ma_district`suff2' = mean_n1_ma_district`suff2'+mean_n2_ma_district`suff2'

gen district_p1_ma`suff2' =  mean_n1_ma_district`suff2' / ( mean_n1_ma_district`suff2'+ mean_n2_ma_district`suff2')
gen school_p1_ma`suff2' = mean_n1_ma`suff2' / ( mean_n1_ma`suff2'+ mean_n2_ma`suff2')

gen va_ma_preY_dev`suff2' = (mean_n1_ma`suff2'-mean_n1_ma_district`suff2')*mu_jt_m1_hat_preY_ma`suff2' + (mean_n2_ma`suff2'-mean_n2_ma_district`suff2')*mu_jt_m2_hat_preY_ma`suff2'

cap drop mu_jt_preY_mean_school_ma`suff2'
gen mu_jt_preY_mean_school_ma`suff2' = mu_jt_m1_hat_preY_ma`suff2' * mean_n1_ma_district`suff2' + ///
	 mu_jt_m2_hat_preY_ma`suff2' * mean_n2_ma_district`suff2'
	 

gen has_va_ma_preY_dev`suff2'=va_ma_preY_dev`suff2'~=.
gen has_mu_jt_preY_mean_s_ma`suff2'=mu_jt_preY_mean_school_ma`suff2'~=.

replace va_ma_preY_dev`suff2'=0 if has_va_ma_preY_dev`suff2'==0
replace mu_jt_preY_mean_school_ma`suff2'=0 if has_mu_jt_preY_mean_s_ma`suff2'==0
}

foreach suff3 in "_C" "_cfrC" {
	if "`suff3'" == "_C" {
		local suff2 = ""
	}
	if "`suff3'" == "_cfrC" {
		local suff2 = "_cfr"
	}
	gen mean_n1_ma`suff3' = (mean_n1_ma`suff2'/mean_n_ma`suff2')*(mean_n1_ma_district`suff2' + mean_n2_ma_district`suff2')
	gen mean_n2_ma`suff3' = (mean_n2_ma`suff2'/mean_n_ma`suff2')*(mean_n1_ma_district`suff2' + mean_n2_ma_district`suff2')
	gen mean_n_ma`suff3' = mean_n1_ma`suff3' + mean_n2_ma`suff3'
	

	gen mean_n1_ma_district`suff3' = mean_n1_ma_district`suff2'
	gen mean_n2_ma_district`suff3' = mean_n2_ma_district`suff2'
	gen mean_n_ma_district`suff3' = mean_n_ma_district`suff2'
	gen district_p1_ma`suff3' = district_p1_ma`suff2'
	gen school_p1_ma`suff3' = school_p1_ma`suff2'
	gen va_ma_preY_dev`suff3' = (mean_n1_ma`suff2'-mean_n1_ma_district`suff2')*mu_jt_m1_hat_preY_ma`suff2' + (mean_n2_ma`suff2'-mean_n2_ma_district`suff2')*mu_jt_m2_hat_preY_ma`suff2'
	
	cap drop mu_jt_preY_mean_school_ma`suff3'
	gen mu_jt_preY_mean_school_ma`suff3' = mu_jt_m1_hat_preY_ma`suff2' * mean_n1_ma_district`suff3' + ///
		mu_jt_m2_hat_preY_ma`suff2' * mean_n2_ma_district`suff3'
	 

	gen has_va_ma_preY_dev`suff3'=va_ma_preY_dev`suff3'~=.
	gen has_mu_jt_preY_mean_s_ma`suff3'=mu_jt_preY_mean_school_ma`suff3'~=.

	replace va_ma_preY_dev`suff3'=0 if has_va_ma_preY_dev`suff3'==0
	replace mu_jt_preY_mean_school_ma`suff3'=0 if has_mu_jt_preY_mean_s_ma`suff3'==0
}



 
keep if applied==1

gen black_teachXblack_share	= frac_black * black
gen hisp_teachXhisp_share = frac_hisp * hispanic

replace black_teachXblack_share = 0 if black_teachXblack_share==.
replace hisp_teachXhisp_share = 0 if hisp_teachXhisp_share==.

foreach var in  exp_2 exp_4 exp_6 exp_missing masters black hispanic female black_teachXblack_share sex_missing race_missing hisp_teachXhisp_share evaas_missing evaas_score praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing {
    gen `var'_T1 = `var' * titleI
}

foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" "_C" "_cfrC" "_imp" {
    
	ren mu_jt_preY_mean_school_ma`suff2' mu_jt_preY_mean_s_ma`suff2'
    
foreach var in va_ma_preY_dev`suff2' mu_jt_preY_mean_s_ma`suff2' has_va_ma_preY_dev`suff2' {
    gen `var'_T1 = `var' * titleI
}
}



duplicates report itidx jtidx
sort jtidx itidx


save "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", replace



foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" "_C" "_cfrC" "_imp" {
	
	
	
use "$basedata/estimationdata/All_T_J/teach_pref_est_data.dta", clear

keep if estsampall`suff2'==1

	local extravar = ""
	if "`suff2'"=="_imp" {
		local extravar = "imputedteacher"
		keep if app_year==2015
	}

set seed 332899
gen sortvar = runiform()
gsort itidx -current_school sortvar
gen current_school_first = current_school==1 & itidx[_n-1]!=itidx

sort itidx jtidx

foreach var in itidx jtidx iidx jidx {
    ren `var' `var'_orig
	egen `var' = group(`var'_orig)
}

keep itidx jtidx itidx_orig jtidx_orig
duplicates tag, gen(dupvar)
assert dupvar==0
drop dupvar

sort itidx_orig jtidx_orig

tempfile tempxwalk
save `tempxwalk', replace

use "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", clear

	local extravar = ""
	if "`suff2'"=="_imp" {
		local extravar = "imputedteacher"
		keep if app_year==2015
	}

duplicates report itidx jtidx
sort jtidx itidx

gen sortvar = _n
keep itidx jtidx sortvar
ren itidx itidx_orig
ren jtidx jtidx_orig
sort itidx_orig jtidx_orig
merge 1:1 itidx_orig jtidx_orig using `tempxwalk'
drop if _m==2

sort sortvar
drop sortvar

assert itidx_orig!=. & jtidx_orig!=.
keep itidx jtidx
replace itidx = 0 if itidx==.
replace jtidx = 0 if jtidx==.

assert itidx==0 if jtidx==0
assert jtidx==0 if itidx==0

order itidx jtidx

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principal_ind`suff2'.csv", comma replace



use "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", clear

	local extravar = ""
	if "`suff2'"=="_imp" {
		local extravar = "imputedteacher"
		keep if app_year==2015
	}

duplicates report itidx jtidx
sort jtidx itidx

egen sidx = group(ncerdc_schlcode)


preserve

keep jtidx sidx positive_assessment va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1 `extravar'
order jtidx sidx positive_assessment va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1 `extravar'

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principalExtra`suff2'.csv", comma replace


restore





* create a similar dataset but for the teacher preferences sample
	use "$basedata/FOCAL_applicant_year_data", clear
	keep applicant_id sy e_ma
	sort applicant_id sy
	tempfile tempexp
	save `tempexp', replace
	
use "$basedata/FOCAL_applicant_data", clear

keep applicant_id praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing
sort applicant_id
tempfile tempteacher
save `tempteacher', replace

use "$basedata/estimationdata/All_T_J/teach_pref_est_data.dta", clear

keep if estsampall`suff2'==1

	local extravar = ""
	if "`suff2'"=="_imp" {
		local extravar = "imputedteacher"
		keep if app_year==2015
	}

sort itidx jtidx

set seed 332899
gen sortvar = runiform()
gsort itidx -current_school sortvar
gen current_school_first = current_school==1 & itidx[_n-1]!=itidx

sort itidx jtidx

foreach var in itidx jtidx iidx jidx {
    ren `var' `var'_orig
	egen `var' = group(`var'_orig)
}



gen sy = app_year+1

sort applicant_id sy
merge n:1 applicant_id sy using `tempexp'
drop if _m==2
drop _m

sort applicant_id
merge n:1 applicant_id using `tempteacher'
drop if _m==2
drop _m

sort ncerdc_id sy
merge n:1 ncerdc_id sy using `tchr_exp'
drop if _m==2
drop _m

cap drop mu_jt_preY_mean_school_ma`suff2'

if "`suff2'" == "_C" {
	gen mean_n1_ma_district_C = mean_n1_ma_district
	gen mean_n2_ma_district_C = mean_n2_ma_district
	gen va_ma_dev_C = va_ma_dev
	gen mu_jt_mean_school_ma_C = mu_jt_m1_hat_preY_ma * mean_n1_ma_district_C + ///
		mu_jt_m2_hat_preY_ma * mean_n2_ma_district_C
}
if "`suff2'" == "_cfrC" {
	gen mean_n1_ma_district_cfrC = mean_n1_ma_district_cfr
	gen mean_n2_ma_district_cfrC = mean_n2_ma_district_cfr
	gen va_ma_dev_cfrC = va_ma_dev_cfr
	gen mu_jt_mean_school_ma_cfrC = mu_jt_m1_hat_preY_ma_cfr * mean_n1_ma_district_cfrC + ///
		mu_jt_m2_hat_preY_ma_cfr * mean_n2_ma_district_cfrC
}


gen p1_district`suff2' = mean_n1_ma_district`suff2'/(mean_n1_ma_district`suff2'+mean_n2_ma_district`suff2')
gen p2_district`suff2' = 1-p1_district`suff2'
gen p1_school`suff2' = (mean_n1_ma`suff2')/(mean_n1_ma`suff2'+mean_n2_ma`suff2')
gen p2_school`suff2' = 1-p1_school`suff2'
gen mean_n_ma_district`suff2' = mean_n1_ma_district`suff2'+mean_n2_ma_district`suff2'

gen va_ma_preY_dev`suff2' = va_ma_dev`suff2'

gen mu_jt_preY_mean_s_ma`suff2' = mu_jt_mean_school_ma`suff2'
gen masters = educ_lvl==5|educ_lvl==7
gen black_teachXblack_share = blackXfrac_black
gen hisp_teachXhisp_share = hispanicXfrac_hisp
gen sex_missing = 0
gen race_missing = 0
gen has_va_ma_preY_dev`suff2' = 1



ren (exp2 exp4 exp6) (exp_2 exp_4 exp_6)

foreach var in va_ma_preY_dev`suff2' mu_jt_preY_mean_s_ma`suff2'  exp_2 exp_4 exp_6 exp_missing masters black hispanic female black_teachXblack_share sex_missing race_missing has_va_ma_preY_dev`suff2' hisp_teachXhisp_share evaas_score evaas_missing  praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing {
    gen `var'_T1 = `var' * titleI
}

gsort itidx -current_school sortvar
sort itidx jtidx



preserve
keep itidx jtidx jtidx_orig app_year va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1 `extravar'
order itidx jtidx jtidx_orig app_year va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1 `extravar'

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principalExtra_cf`suff2'.csv", comma replace
restore



preserve


cap drop mean_n_ma_district`suff2'
gen mean_n_ma_district`suff2' = mean_n1_ma_district`suff2' + mean_n2_ma_district`suff2'
gen school_p1_ma`suff2' = mean_n1_ma`suff2' / ( mean_n1_ma`suff2'+ mean_n2_ma`suff2')
replace mean_n1_ma`suff2' = school_p1_ma`suff2'* mean_n_ma_district`suff2'
replace mean_n2_ma`suff2' = (1-school_p1_ma`suff2')* mean_n_ma_district`suff2'


keep mean_n1_ma`suff2' mean_n2_ma`suff2' mu_jt_m1_hat_preY_ma`suff2' mu_jt_m2_hat_preY_ma`suff2' e_ma `extravar'
order mean_n1_ma`suff2' mean_n2_ma`suff2' mu_jt_m1_hat_preY_ma`suff2' mu_jt_m2_hat_preY_ma`suff2' e_ma `extravar'
outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principal_cf_n`suff2'.csv", comma replace
restore

}


*** sample changes just for the baseline model

foreach suff1 in "day1" "buff7" "donut" "multiapp" "first" "second" "wi14" "hired" {

local suff2 = ""
	
	
use "$basedata/estimationdata/All_T_J/teach_pref_est_data.dta", clear

keep if estsampall`suff2'==1

if "`suff1'"=="day1" {

replace choiceset = choicesetday1

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}

if "`suff1'"=="buff7" {
replace choiceset = choiceset7

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}

if "`suff1'"=="donut" {
replace choiceset = 0 if abs(first_person_app-PostingDate)<=7 | current_school==1

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}
if "`suff1'"=="multiapp" {
bys itidx: egen num_apps = sum(applied)
keep if num_apps>1
drop num_apps 

sort itidx jtidx
}

set seed 332899
gen sortvar = runiform()
gsort itidx -current_school sortvar
gen current_school_first = current_school==1 & itidx[_n-1]!=itidx

sort itidx jtidx

foreach var in itidx jtidx iidx jidx {
    ren `var' `var'_orig
	egen `var' = group(`var'_orig)
}

keep itidx jtidx itidx_orig jtidx_orig
duplicates tag, gen(dupvar)
assert dupvar==0
drop dupvar

sort itidx_orig jtidx_orig

tempfile tempxwalk
save `tempxwalk', replace

use "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", clear

if "`suff1'"=="first" {
	keep if first_half==1
}
if "`suff1'"=="second" {
	keep if second_half==1
}
if "`suff1'"=="wi14" {
	keep if within_14_days_hire==1
}
if "`suff1'"=="hired" {
	replace positive_assessment = hired
}

duplicates report itidx jtidx
sort jtidx itidx

gen sortvar = _n
keep itidx jtidx sortvar
ren itidx itidx_orig
ren jtidx jtidx_orig
sort itidx_orig jtidx_orig
merge 1:1 itidx_orig jtidx_orig using `tempxwalk'
drop if _m==2

sort sortvar
drop sortvar

assert itidx_orig!=. & jtidx_orig!=.
keep itidx jtidx
replace itidx = 0 if itidx==.
replace jtidx = 0 if jtidx==.

assert itidx==0 if jtidx==0
assert jtidx==0 if itidx==0

order itidx jtidx

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principal_ind_`suff1'.csv", comma replace



use "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", clear

if "`suff1'"=="first" {
	keep if first_half==1
}
if "`suff1'"=="second" {
	keep if second_half==1
}
if "`suff1'"=="wi14" {
	keep if within_14_days_hire==1
}
if "`suff1'"=="hired" {
	replace positive_assessment = hired
}

duplicates report itidx jtidx
sort jtidx itidx

egen sidx = group(ncerdc_schlcode)


preserve

keep jtidx sidx positive_assessment va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1
order jtidx sidx positive_assessment va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principalExtra_`suff1'.csv", comma replace


restore





* create a similar dataset but for the teacher preferences sample
	use "$basedata/FOCAL_applicant_year_data", clear
	keep applicant_id sy e_ma
	sort applicant_id sy
	tempfile tempexp
	save `tempexp', replace
	
use "$basedata/FOCAL_applicant_data", clear

keep applicant_id praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing
sort applicant_id
tempfile tempteacher
save `tempteacher', replace

use "$basedata/estimationdata/All_T_J/teach_pref_est_data.dta", clear

keep if estsampall`suff2'==1

if "`suff1'"=="day1" {

replace choiceset = choicesetday1

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}

if "`suff1'"=="buff7" {
replace choiceset = choiceset7

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}

if "`suff1'"=="donut" {
replace choiceset = 0 if abs(first_person_app-PostingDate)<=7 | current_school==1

bys itidx: egen maxchoiceseti = max(choiceset)
bys jtidx: egen maxchoicesetj = max(choiceset)

drop if maxchoiceseti==0 | maxchoicesetj==0

sort itidx jtidx
}

if "`suff1'"=="multiapp" {
bys itidx: egen num_apps = sum(applied)
keep if num_apps>1
drop num_apps 

sort itidx jtidx
}

set seed 332899
gen sortvar = runiform()
gsort itidx -current_school sortvar
gen current_school_first = current_school==1 & itidx[_n-1]!=itidx

sort itidx jtidx

foreach var in itidx jtidx iidx jidx {
    ren `var' `var'_orig
	egen `var' = group(`var'_orig)
}



gen sy = app_year+1

sort applicant_id sy
merge n:1 applicant_id sy using `tempexp'
drop if _m==2
drop _m

sort applicant_id
merge n:1 applicant_id using `tempteacher'
drop if _m==2
drop _m

sort ncerdc_id sy
merge n:1 ncerdc_id sy using `tchr_exp'
drop if _m==2
drop _m

cap drop mu_jt_preY_mean_school_ma`suff2'


gen p1_district`suff2' = mean_n1_ma_district`suff2'/(mean_n1_ma_district`suff2'+mean_n2_ma_district`suff2')
gen p2_district`suff2' = 1-p1_district`suff2'
gen p1_school`suff2' = (mean_n1_ma`suff2')/(mean_n1_ma`suff2'+mean_n2_ma`suff2')
gen p2_school`suff2' = 1-p1_school`suff2'
gen mean_n_ma_district`suff2' = mean_n1_ma_district`suff2'+mean_n2_ma_district`suff2'

gen va_ma_preY_dev`suff2' = va_ma_dev`suff2'

gen mu_jt_preY_mean_s_ma`suff2' = mu_jt_mean_school_ma`suff2'
gen masters = educ_lvl==5|educ_lvl==7
gen black_teachXblack_share = blackXfrac_black
gen hisp_teachXhisp_share = hispanicXfrac_hisp
gen sex_missing = 0
gen race_missing = 0
gen has_va_ma_preY_dev`suff2' = 1



ren (exp2 exp4 exp6) (exp_2 exp_4 exp_6)

foreach var in va_ma_preY_dev`suff2' mu_jt_preY_mean_s_ma`suff2'  exp_2 exp_4 exp_6 exp_missing masters black hispanic female black_teachXblack_share sex_missing race_missing has_va_ma_preY_dev`suff2' hisp_teachXhisp_share evaas_score evaas_missing praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing {
    gen `var'_T1 = `var' * titleI
}

gsort itidx -current_school sortvar
sort itidx jtidx



preserve
keep itidx jtidx jtidx_orig app_year va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1
order itidx jtidx jtidx_orig app_year va_ma_preY_dev`suff2' va_ma_preY_dev`suff2'_T1 mu_jt_preY_mean_s_ma`suff2' mu_jt_preY_mean_s_ma`suff2'_T1 exp_2 exp_2_T1 exp_4 exp_4_T1 exp_6 exp_6_T1 ///
	exp_missing exp_missing_T1 masters masters_T1 black black_teachXblack_share black_T1 black_teachXblack_share_T1 hispanic hispanic_T1 hisp_teachXhisp_share ///
	hisp_teachXhisp_share_T1 female female_T1 sex_missing sex_missing_T1 race_missing race_missing_T1 has_va_ma_preY_dev`suff2' has_va_ma_preY_dev`suff2'_T1 titleI ///
	praxis praxis_T1 praxisMissing praxisMissing_T1 grad_degree grad_degree_T1 grad_degreeMissing grad_degreeMissing_T1 certified certified_T1 certifiedMissing certifiedMissing_T1 ///
	licensed licensed_T1 licensedMissing licensedMissing_T1

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principalExtra_cf_`suff1'.csv", comma replace
restore



preserve

foreach suff3 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
cap drop mean_n_ma_district`suff3'
gen mean_n_ma_district`suff3' = mean_n1_ma_district`suff3' + mean_n2_ma_district`suff3'
gen school_p1_ma`suff3' = mean_n1_ma`suff3' / ( mean_n1_ma`suff3'+ mean_n2_ma`suff3')
replace mean_n1_ma`suff3' = school_p1_ma`suff3'* mean_n_ma_district`suff3'
replace mean_n2_ma`suff3' = (1-school_p1_ma`suff3')* mean_n_ma_district`suff3'
}

keep mean_n1_ma mean_n2_ma mean_n1_ma_race mean_n2_ma_race mean_n1_ma_ach mean_n2_ma_ach mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_m1_hat_preY_ma_race mu_jt_m2_hat_preY_ma_race mu_jt_m1_hat_preY_ma_ach mu_jt_m2_hat_preY_ma_ach e_ma
order mean_n1_ma mean_n2_ma mean_n1_ma_race mean_n2_ma_race mean_n1_ma_ach mean_n2_ma_ach mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_m1_hat_preY_ma_race mu_jt_m2_hat_preY_ma_race mu_jt_m1_hat_preY_ma_ach mu_jt_m2_hat_preY_ma_ach e_ma
outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principal_cf_n_`suff1'.csv", comma replace
restore
}

** month data

use "$basedata/estimationdata/All_T_J/principal_pref_est_data.dta", clear

duplicates report itidx jtidx
sort jtidx itidx

gen posting_year = year(PostingDate)
gen posting_month = month(PostingDate)

keep itidx jtidx posting_year posting_month
order itidx jtidx posting_year posting_month

outsheet using "$basedata/estimationdata/All_T_J/All_T_J_choices_buff_0_0_lf_principal_month.csv", comma replace

